timsort: Make sure merges don't take too long

author Benjamin Otte <otte@redhat.com>

Sat, 11 Jul 2020 18:34:16 +0000 (20:34 +0200)

committer Benjamin Otte <otte@redhat.com>

Wed, 22 Jul 2020 12:04:40 +0000 (14:04 +0200)
author Benjamin Otte <otte@redhat.com>
Sat, 11 Jul 2020 18:34:16 +0000 (20:34 +0200)
committer Benjamin Otte <otte@redhat.com>
Wed, 22 Jul 2020 12:04:40 +0000 (14:04 +0200)
diff --git a/gtk/gtktimsort-impl.c b/gtk/gtktimsort-impl.c

index 5dd1c98d3f4cdea6f9f454e6e68f00d355f79091..3e048324e2bd7f33fe6157a64778a6a3becf88dc 100644 (file)
--- a/gtk/gtktimsort-impl.c
+++ b/gtk/gtktimsort-impl.c
@@ -805,18 +805,6 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
    g_assert (len1 > 0 && len2 > 0);
    g_assert (ELEM (base1, len1) == base2);
  
-  /*
-   * Record the length of the combined runs; if i is the 3rd-last
-   * run now, also slide over the last run (which isn't involved
-   * in this merge).  The current run (i+1) goes away in any case.
-   */
-  self->run[i].len = len1 + len2;
-  if (i == self->pending_runs - 3)
-    {
-      self->run[i + 1] = self->run[i + 2];
-    }
-  self->pending_runs--;
-
    /*
     * Find where the first element of run2 goes in run1. Prior elements
     * in run1 can be ignored (because they're already in place).
@@ -825,7 +813,7 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
    base1 = ELEM (base1, k);
    len1 -= k;
    if (len1 == 0)
-    return;
+    goto done;
  
    /*
     * Find where the last element of run1 goes in run2. Subsequent elements
@@ -835,13 +823,53 @@ gtk_tim_sort(merge_at) (GtkTimSort *self,
                                      ELEM (base1, len1 - 1),
                                      base2, len2, len2 - 1);
    if (len2 == 0)
-    return;
+    goto done;
  
    /* Merge remaining runs, using tmp array with min(len1, len2) elements */
    if (len1 <= len2)
-    gtk_tim_sort(merge_lo) (self, base1, len1, base2, len2);
+    {
+      if (len1 > MAX_MERGE_PER_RUN)
+        {
+          base1 = ELEM (self->run[i].base, self->run[i].len - MAX_MERGE_PER_RUN);
+          gtk_tim_sort(merge_lo) (self, base1, MAX_MERGE_PER_RUN, base2, len2);
+          self->run[i].len -= MAX_MERGE_PER_RUN;
+          self->run[i + 1].base = ELEM (self->run[i + 1].base, - MAX_MERGE_PER_RUN);
+          self->run[i + 1].len += MAX_MERGE_PER_RUN;
+          g_assert (ELEM (self->run[i].base, self->run[i].len) == self->run[i + 1].base);
+          return;
+        }
+      else
+        {
+          gtk_tim_sort(merge_lo) (self, base1, len1, base2, len2);
+        }
+    }
    else
-    gtk_tim_sort(merge_hi) (self, base1, len1, base2, len2);
+    {
+      if (len2 > MAX_MERGE_PER_RUN)
+        {
+          gtk_tim_sort(merge_hi) (self, base1, len1, base2, MAX_MERGE_PER_RUN);
+          self->run[i].len += MAX_MERGE_PER_RUN;
+          self->run[i + 1].base = ELEM (self->run[i + 1].base, MAX_MERGE_PER_RUN);
+          self->run[i + 1].len -= MAX_MERGE_PER_RUN;
+          g_assert (ELEM (self->run[i].base, self->run[i].len) == self->run[i + 1].base);
+          return;
+        }
+      else
+        {
+          gtk_tim_sort(merge_hi) (self, base1, len1, base2, len2);
+        }
+    }
+
+done:
+  /*
+   * Record the length of the combined runs; if i is the 3rd-last
+   * run now, also slide over the last run (which isn't involved
+   * in this merge).  The current run (i+1) goes away in any case.
+   */
+  self->run[i].len += self->run[i + 1].len;
+  if (i == self->pending_runs - 3)
+    self->run[i + 1] = self->run[i + 2];
+  self->pending_runs--;
  }
  
  
diff --git a/gtk/gtktimsort.c b/gtk/gtktimsort.c

index c59682cebddc39c5d9ceb5ac39fab512739fdc16..38a46880e863eecd0baef6578ec65314a40b0513 100644 (file)
--- a/gtk/gtktimsort.c
+++ b/gtk/gtktimsort.c
@@ -25,6 +25,15 @@
   */
  #define MIN_MERGE 32
  
+/*
+ * Limit the amount of work done when merging. This ensures that the step
+ * function does not take too much time. Of coure, there's overhead associated
+ * with splitting a merge into miultliple merges.
+ *
+ * So lowering this number will make the average runtime of the step function
+ * faster but increase the total runtime.
+ */
+#define MAX_MERGE_PER_RUN 1024
  
  /*
   * When we get into galloping mode, we stay there until both runs win less
author	Benjamin Otte <otte@redhat.com>
	Sat, 11 Jul 2020 18:34:16 +0000 (20:34 +0200)
committer	Benjamin Otte <otte@redhat.com>
	Wed, 22 Jul 2020 12:04:40 +0000 (14:04 +0200)
gtk/gtktimsort-impl.c		patch \| blob \| history
gtk/gtktimsort.c		patch \| blob \| history